pew %>% gather(2:7, key = "income_bracket", value = "count")
billboard %>% gather(6:81, key = "week", value = "rank")
weather %>% gather(d1:d8, key = "day", value = "temp") %>% spread(key = "element", value = "temp")
separate()ed.unite()ed.A
## # A tibble: 6 x 3 ## country year rate ## * <chr> <int> <chr> ## 1 Afghanistan 1999 745/19987071 ## 2 Afghanistan 2000 2666/20595360 ## 3 Brazil 1999 37737/172006362 ## 4 Brazil 2000 80488/174504898 ## 5 China 1999 212258/1272915272 ## 6 China 2000 213766/1280428583
A %>%
separate(rate, into = c("cases", "population"))
## # A tibble: 6 x 4 ## country year cases population ## * <chr> <int> <chr> <chr> ## 1 Afghanistan 1999 745 19987071 ## 2 Afghanistan 2000 2666 20595360 ## 3 Brazil 1999 37737 172006362 ## 4 Brazil 2000 80488 174504898 ## 5 China 1999 212258 1272915272 ## 6 China 2000 213766 1280428583
A %>%
separate(rate, into = c("cases", "population"), sep = "/")
## # A tibble: 6 x 4 ## country year cases population ## * <chr> <int> <chr> <chr> ## 1 Afghanistan 1999 745 19987071 ## 2 Afghanistan 2000 2666 20595360 ## 3 Brazil 1999 37737 172006362 ## 4 Brazil 2000 80488 174504898 ## 5 China 1999 212258 1272915272 ## 6 China 2000 213766 1280428583
A %>%
separate(rate, into = c("cases", "population"), convert = TRUE)
## # A tibble: 6 x 4 ## country year cases population ## * <chr> <int> <int> <int> ## 1 Afghanistan 1999 745 19987071 ## 2 Afghanistan 2000 2666 20595360 ## 3 Brazil 1999 37737 172006362 ## 4 Brazil 2000 80488 174504898 ## 5 China 1999 212258 1272915272 ## 6 China 2000 213766 1280428583
This is a less common operation than separate() (though it is needed for Exercise 5.7). Imagine you were working with this (butchered) form of data set C.
C2 <- C %>%
separate(year, into = c("century", "year_in_century"), sep = 2)
C2
## # A tibble: 6 x 5 ## country century year_in_century cases population ## <chr> <chr> <chr> <int> <int> ## 1 Afghanistan 19 99 745 19987071 ## 2 Afghanistan 20 00 2666 20595360 ## 3 Brazil 19 99 37737 172006362 ## 4 Brazil 20 00 80488 174504898 ## 5 China 19 99 212258 1272915272 ## 6 China 20 00 213766 1280428583
We need to unite() the century and year_in_century columns.
C2 %>% unite(century, year_in_century, col = "year", sep = "")
## # A tibble: 6 x 4 ## country year cases population ## <chr> <chr> <int> <int> ## 1 Afghanistan 1999 745 19987071 ## 2 Afghanistan 2000 2666 20595360 ## 3 Brazil 1999 37737 172006362 ## 4 Brazil 2000 80488 174504898 ## 5 China 1999 212258 1272915272 ## 6 China 2000 213766 1280428583
gather() the columns into values.spread()ing the values across the columnsseparate()ed.unite()ed.Do you ever find yourself with .Rmd files that look like this?
my_df1 %>% ... # do some stuff to my_df1 ... my_df2 %>% ... # do the same stuff to my_df2 ... my_df3 %>% ... # and again to my_df3 ...
What if I want to draw the same kind of plot several times?
my_df1 %>% ggplot(aes(x = var1, y = var2, color = var3)) + geom_point() + geom_line() my_df2 %>% ggplot(aes(x = varA, y = varB, color = varC)) + geom_point() + geom_line() my_df3 %>% ggplot(aes(x = var1A, y = var2B, color = var3C)) + geom_point() + geom_line()
name_of_function <- function(data, var = "value") {
. . .
. . .
<valid R code>
. . .
. . .
return(x)
}
data, var
data is requiredvar is optional - has a default value of"value"`xlibrary(tidyverse)
my_cars <- function(mod) {
mpg %>%
filter(model == mod)
}
my_cars("protege")
## # A tibble: 0 x 11 ## # ... with 11 variables: manufacturer <chr>, model <chr>, displ <dbl>, ## # year <int>, cyl <int>, trans <chr>, drv <chr>, cty <int>, hwy <int>, ## # fl <chr>, class <chr>
my_cars <- function(mod = "civic") {
mpg %>%
filter(model == mod)
}
my_cars()
## # A tibble: 9 x 11 ## manufacturer model displ year cyl trans drv cty hwy fl class ## <chr> <chr> <dbl> <int> <int> <chr> <chr> <int> <int> <chr> <chr> ## 1 honda civic 1.60 1999 4 manu… f 28 33 r subc… ## 2 honda civic 1.60 1999 4 auto… f 24 32 r subc… ## 3 honda civic 1.60 1999 4 manu… f 25 32 r subc… ## 4 honda civic 1.60 1999 4 manu… f 23 29 p subc… ## 5 honda civic 1.60 1999 4 auto… f 24 32 r subc… ## 6 honda civic 1.80 2008 4 manu… f 26 34 r subc… ## 7 honda civic 1.80 2008 4 auto… f 25 36 r subc… ## 8 honda civic 1.80 2008 4 auto… f 24 36 c subc… ## 9 honda civic 2.00 2008 4 manu… f 21 29 p subc…
my_cars("jetta")
## # A tibble: 9 x 11 ## manufacturer model displ year cyl trans drv cty hwy fl class ## <chr> <chr> <dbl> <int> <int> <chr> <chr> <int> <int> <chr> <chr> ## 1 volkswagen jetta 1.90 1999 4 manu… f 33 44 d comp… ## 2 volkswagen jetta 2.00 1999 4 manu… f 21 29 r comp… ## 3 volkswagen jetta 2.00 1999 4 auto… f 19 26 r comp… ## 4 volkswagen jetta 2.00 2008 4 auto… f 22 29 p comp… ## 5 volkswagen jetta 2.00 2008 4 manu… f 21 29 p comp… ## 6 volkswagen jetta 2.50 2008 5 auto… f 21 29 r comp… ## 7 volkswagen jetta 2.50 2008 5 manu… f 21 29 r comp… ## 8 volkswagen jetta 2.80 1999 6 auto… f 16 23 r comp… ## 9 volkswagen jetta 2.80 1999 6 manu… f 17 24 r comp…
my_cars("camry") %>%
head(2)
## # A tibble: 2 x 11 ## manufacturer model displ year cyl trans drv cty hwy fl class ## <chr> <chr> <dbl> <int> <int> <chr> <chr> <int> <int> <chr> <chr> ## 1 toyota camry 2.20 1999 4 manu… f 21 29 r mids… ## 2 toyota camry 2.20 1999 4 auto… f 21 27 r mids…
my_cars(mod = "corolla") %>% head(2)
## # A tibble: 2 x 11 ## manufacturer model displ year cyl trans drv cty hwy fl class ## <chr> <chr> <dbl> <int> <int> <chr> <chr> <int> <int> <chr> <chr> ## 1 toyota coro… 1.80 1999 4 auto… f 24 30 r comp… ## 2 toyota coro… 1.80 1999 4 auto… f 24 33 r comp…
Pay attention to:
What does this do?
most_popular_year <- function(data, name_arg) {
data %>%
filter(name == name_arg) %>%
group_by(year) %>%
summarize(total = sum(prop)) %>%
arrange(desc(total)) %>%
head(1) %>%
select(year)
}
library(babynames) most_popular_year(data = babynames, name_arg = "Andrew")
## # A tibble: 1 x 1 ## year ## <dbl> ## 1 1987
most_popular_year(babynames, "Andrew")
## # A tibble: 1 x 1 ## year ## <dbl> ## 1 1987
# most_popular_year("Andrew")
For the following exercises, use the pnwflights14 dataset.
https://data.oregon.gov/Business/New-Businesses-Registered-Last-Month/esjy-u4fc
library(tidyverse)
biz <- read_csv("../data/New_Businesses_Registered_Last_Month.csv")
?read_csv
str(biz)
## Classes 'tbl_df', 'tbl' and 'data.frame': 19674 obs. of 17 variables: ## $ Registry Number : int 139275697 139275697 139253991 139253991 139253991 139277594 139277594 139255491 139255491 139255491 ... ## $ Business Name : chr "A PIECE OF HEAVEN ADULT CARE HOME" "A PIECE OF HEAVEN ADULT CARE HOME" "A. RE PLUMBING LLC" "A. RE PLUMBING LLC" ... ## $ Entity Type : chr "ASSUMED BUSINESS NAME" "ASSUMED BUSINESS NAME" "DOMESTIC LIMITED LIABILITY COMPANY" "DOMESTIC LIMITED LIABILITY COMPANY" ... ## $ Registry Date : chr "01/02/2018" "01/02/2018" "01/02/2018" "01/02/2018" ... ## $ Associated Name Type : chr "AUTHORIZED REPRESENTATIVE" "PRINCIPAL PLACE OF BUSINESS" "MAILING ADDRESS" "PRINCIPAL PLACE OF BUSINESS" ... ## $ First Name : chr "OLIMPIA" NA NA NA ... ## $ Middle Name : chr "V" NA NA NA ... ## $ Last Name : chr "URSU" NA NA NA ... ## $ Suffix : chr NA NA NA NA ... ## $ Not of Record Entity : chr NA NA NA NA ... ## $ Entity of Record Reg Number: int NA NA NA NA NA NA NA NA NA NA ... ## $ Entity of Record Name : chr NA NA NA NA ... ## $ Address : chr "13460 SE RUSK RD" "13460 SE RUSK RD" "241 NW HOWARD LN" "241 NW HOWARD LN" ... ## $ Address Continued : chr NA NA NA NA ... ## $ City : chr "MILWAUKIE" "MILWAUKIE" "DALLAS" "DALLAS" ... ## $ State : chr "OR" "OR" "OR" "OR" ... ## $ Zip Code : chr "97222" "97222" "97338" "97338" ... ## - attr(*, "spec")=List of 2 ## ..$ cols :List of 17 ## .. ..$ Registry Number : list() ## .. .. ..- attr(*, "class")= chr "collector_integer" "collector" ## .. ..$ Business Name : list() ## .. .. ..- attr(*, "class")= chr "collector_character" "collector" ## .. ..$ Entity Type : list() ## .. .. ..- attr(*, "class")= chr "collector_character" "collector" ## .. ..$ Registry Date : list() ## .. .. ..- attr(*, "class")= chr "collector_character" "collector" ## .. ..$ Associated Name Type : list() ## .. .. ..- attr(*, "class")= chr "collector_character" "collector" ## .. ..$ First Name : list() ## .. .. ..- attr(*, "class")= chr "collector_character" "collector" ## .. ..$ Middle Name : list() ## .. .. ..- attr(*, "class")= chr "collector_character" "collector" ## .. ..$ Last Name : list() ## .. .. ..- attr(*, "class")= chr "collector_character" "collector" ## .. ..$ Suffix : list() ## .. .. ..- attr(*, "class")= chr "collector_character" "collector" ## .. ..$ Not of Record Entity : list() ## .. .. ..- attr(*, "class")= chr "collector_character" "collector" ## .. ..$ Entity of Record Reg Number: list() ## .. .. ..- attr(*, "class")= chr "collector_integer" "collector" ## .. ..$ Entity of Record Name : list() ## .. .. ..- attr(*, "class")= chr "collector_character" "collector" ## .. ..$ Address : list() ## .. .. ..- attr(*, "class")= chr "collector_character" "collector" ## .. ..$ Address Continued : list() ## .. .. ..- attr(*, "class")= chr "collector_character" "collector" ## .. ..$ City : list() ## .. .. ..- attr(*, "class")= chr "collector_character" "collector" ## .. ..$ State : list() ## .. .. ..- attr(*, "class")= chr "collector_character" "collector" ## .. ..$ Zip Code : list() ## .. .. ..- attr(*, "class")= chr "collector_character" "collector" ## ..$ default: list() ## .. ..- attr(*, "class")= chr "collector_guess" "collector" ## ..- attr(*, "class")= chr "col_spec"
glimpse(biz)
## Observations: 19,674 ## Variables: 17 ## $ `Registry Number` <int> 139275697, 139275697, 139253991,... ## $ `Business Name` <chr> "A PIECE OF HEAVEN ADULT CARE HO... ## $ `Entity Type` <chr> "ASSUMED BUSINESS NAME", "ASSUME... ## $ `Registry Date` <chr> "01/02/2018", "01/02/2018", "01/... ## $ `Associated Name Type` <chr> "AUTHORIZED REPRESENTATIVE", "PR... ## $ `First Name` <chr> "OLIMPIA", NA, NA, NA, "ANTHONY"... ## $ `Middle Name` <chr> "V", NA, NA, NA, NA, NA, "A", NA... ## $ `Last Name` <chr> "URSU", NA, NA, NA, "RE", NA, "G... ## $ Suffix <chr> NA, NA, NA, NA, NA, NA, NA, NA, ... ## $ `Not of Record Entity` <chr> NA, NA, NA, NA, NA, NA, NA, NA, ... ## $ `Entity of Record Reg Number` <int> NA, NA, NA, NA, NA, NA, NA, NA, ... ## $ `Entity of Record Name` <chr> NA, NA, NA, NA, NA, NA, NA, NA, ... ## $ Address <chr> "13460 SE RUSK RD", "13460 SE RU... ## $ `Address Continued` <chr> NA, NA, NA, NA, NA, NA, NA, NA, ... ## $ City <chr> "MILWAUKIE", "MILWAUKIE", "DALLA... ## $ State <chr> "OR", "OR", "OR", "OR", "OR", "O... ## $ `Zip Code` <chr> "97222", "97222", "97338", "9733...
Q1: What is the trend in new business licenses over the last month?
Are these distinct businesses?
biz %>% head(3)
## # A tibble: 3 x 17 ## `Registry Number` `Business Name` `Entity Type` `Registry Date` ## <int> <chr> <chr> <chr> ## 1 139275697 A PIECE OF HEAVEN … ASSUMED BUSINESS … 01/02/2018 ## 2 139275697 A PIECE OF HEAVEN … ASSUMED BUSINESS … 01/02/2018 ## 3 139253991 A. RE PLUMBING LLC DOMESTIC LIMITED … 01/02/2018 ## # ... with 13 more variables: `Associated Name Type` <chr>, `First ## # Name` <chr>, `Middle Name` <chr>, `Last Name` <chr>, Suffix <chr>, ## # `Not of Record Entity` <chr>, `Entity of Record Reg Number` <int>, ## # `Entity of Record Name` <chr>, Address <chr>, `Address ## # Continued` <chr>, City <chr>, State <chr>, `Zip Code` <chr>
Q1: What is the trend in new business licenses over the last month?
biz <- biz %>%
distinct(`Business Name`, .keep_all = TRUE)
biz
## # A tibble: 7,291 x 17 ## `Registry Number` `Business Name` `Entity Type` `Registry Date` ## <int> <chr> <chr> <chr> ## 1 139275697 A PIECE OF HEAVEN … ASSUMED BUSINESS… 01/02/2018 ## 2 139253991 A. RE PLUMBING LLC DOMESTIC LIMITED… 01/02/2018 ## 3 139277594 ADVANCED CANDY AND… DOMESTIC BUSINES… 01/02/2018 ## 4 139255491 ALEJANDRAS LAWN SE… DOMESTIC LIMITED… 01/02/2018 ## 5 139145395 ANIMATION SEO INC. DOMESTIC BUSINES… 01/02/2018 ## 6 139232797 BEAUTIFUL BLINDS A… DOMESTIC LIMITED… 01/02/2018 ## 7 139253090 BETHRAL LLC DOMESTIC LIMITED… 01/02/2018 ## 8 139246599 BOND CONSTRUCTION … DOMESTIC LIMITED… 01/02/2018 ## 9 139257794 D&C GOODS LLC DOMESTIC LIMITED… 01/02/2018 ## 10 139276190 D&D AFFORDABLE TIR… DOMESTIC LIMITED… 01/02/2018 ## # ... with 7,281 more rows, and 13 more variables: `Associated Name ## # Type` <chr>, `First Name` <chr>, `Middle Name` <chr>, `Last ## # Name` <chr>, Suffix <chr>, `Not of Record Entity` <chr>, `Entity of ## # Record Reg Number` <int>, `Entity of Record Name` <chr>, ## # Address <chr>, `Address Continued` <chr>, City <chr>, State <chr>, ## # `Zip Code` <chr>
Q1: What is the trend in new business licenses over the last month?
Here is our key column:
head(biz$`Registry Date`, 3)
## [1] "01/02/2018" "01/02/2018" "01/02/2018"
class(biz$`Registry Date`)
## [1] "character"
But we'd like a data frame that can do this:
ggplot(biz, aes(x = date, y = count)) + geom_line()
integernumericlogicalcharacterfactorA package to represent datetime data, do operations on it, and output it in various formats.
You can create <date> data using a variety of functions tailored to the format of the character string.
library(lubridate)
ymd("2018-02-22")
## [1] "2018-02-22"
mdy("February 22nd, 2018")
## [1] "2018-02-22"
dmy("22-Feb-2018")
## [1] "2018-02-22"
You can create <dttm> (date time) data by extending the same syntax.
mdy_hm("02/22/2018 06:26")
## [1] "2018-02-22 06:26:00 UTC"
You can also cobble together a <dttm> from across multiple columns.
flights %>% select(year, month, day, hour, minute) %>% mutate(departure = make_datetime(year, month, day, hour, minute))
Once you have data represented as a date-time, it's easy to pull out components that you're interested in.
now <- now() now
## [1] "2018-02-22 19:19:30 PST"
day(now)
## [1] 22
hour(now)
## [1] 19
minute(now)
## [1] 19
wday(now)
## [1] 5
wday(now, label = TRUE)
## [1] Thu ## Levels: Sun < Mon < Tue < Wed < Thu < Fri < Sat
wday(now, label = TRUE, abbr = FALSE)
## [1] Thursday ## 7 Levels: Sunday < Monday < Tuesday < Wednesday < Thursday < ... < Saturday
joy <- mdy(04272018) - today() class(joy)
## [1] "difftime"
joy
## Time difference of 64 days
joy <- as.duration(joy) joy
## [1] "5529600s (~9.14 weeks)"
Use pnwflights14 to answer the following questions.
d1 <- "January 1, 2010"
d2 <- "2015-Mar-07"
d3 <- "06-Jun-2017"
d4 <- c("August 19 (2015)", "July 1 (2015)")
d5 <- "12/30/14" # Dec 30, 2014
pnwflights14 flights data, on what day of the week should you leave PDX if you want to minimize the chance of a delay?d <- dmy(biz$`Registry Date`)
## Warning: 4364 failed to parse.
?dmy
d <- mdy(biz$`Registry Date`) class(d)
## [1] "Date"
head(d)
## [1] "2018-01-02" "2018-01-02" "2018-01-02" "2018-01-02" "2018-01-02" ## [6] "2018-01-02"
Q1: What is the trend in new business licenses over the last month?
First a barchart.
biz <- biz %>% mutate(registry_date = mdy(`Registry Date`)) ggplot(biz, aes(x = registry_date)) + geom_bar()
Q1: What is the trend in new business licenses over the last month?
Let's try a line chart
biz %>% group_by(registry_date) %>% summarize(count = n()) %>% ggplot(aes(x = registry_date, y = count)) + geom_line()
Q2: What is the weekly cycle in new business licenses?
biz %>% mutate(day_of_week = wday(registry_date, label = TRUE)) %>% ggplot(aes(x = day_of_week)) + geom_bar()
.csv fileLots of interesting data lives out on the web as HTML tables.
An R package for harvesting HTML data.
library(rvest) url <- "https://www.nytimes.com/interactive/2018/sports/olympics/medal-count-results-schedule.html?smid=tw-nytimes&smtyp=cur" url %>% read_html()
## {xml_document}
## <html lang="en" class="no-js page-interactive section-sports page-theme-standard tone-news page-interactive-default limit-small layout-xlarge app-interactive" itemid="https://www.nytimes.com/interactive/2018/sports/olympics/medal-count-results-schedule.html" itemtype="http://schema.org/NewsArticle" itemscope="" xmlns:og="http://opengraphprotocol.org/schema/">
## [1] <head>\n<meta http-equiv="Content-Type" content="text/html; charset= ...
## [2] <body>\n<style>\n.lt-ie10 .messenger.suggestions {\n display: block ...
tables <- url %>%
read_html() %>%
html_nodes("table")
class(tables)
## [1] "xml_nodeset"
length(tables)
## [1] 2
html_table(tables[[1]])
## X1 X2 X3 X4 X5 ## 1 Medal Count Gold Silver Bronze Total ## 2 NorwayNOR 12 11 9 32 ## 3 GermanyGER 11 7 5 23 ## 4 CanadaCAN 9 5 6 20 ## 5 NetherlandsNED 6 5 3 14 ## 6 United StatesUSA 6 3 5 14 ## 7 FranceFRA 5 4 5 14 ## 8 Olympic Athletes From RussiaOAR 0 4 9 13 ## 9 AustriaAUT 4 2 4 10 ## 10 JapanJPN 2 5 3 10 ## 11 ItalyITA 3 2 4 9 ## 12 SwedenSWE 4 4 0 8 ## 13 South KoreaKOR 4 2 2 8 ## 14 SwitzerlandSUI 2 5 1 8 ## 15 ChinaCHN 0 5 2 7 ## 16 Czech RepublicCZE 1 2 3 6 ## 17 BritainGBR 1 0 3 4 ## 18 FinlandFIN 0 0 4 4 ## 19 SlovakiaSVK 1 2 0 3 ## 20 AustraliaAUS 0 2 1 3 ## 21 BelarusBLR 1 1 0 2 ## 22 PolandPOL 1 0 1 2 ## 23 SpainESP 0 0 2 2 ## 24 UkraineUKR 1 0 0 1 ## 25 SloveniaSLO 0 1 0 1 ## 26 KazakhstanKAZ 0 0 1 1 ## 27 LatviaLAT 0 0 1 1 ## 28 LiechtensteinLIE 0 0 1 1 ## 29 AlbaniaALB 0 0 0 0 ## 30 AndorraAND 0 0 0 0 ## 31 ArgentinaARG 0 0 0 0 ## 32 ArmeniaARM 0 0 0 0 ## 33 AzerbaijanAZE 0 0 0 0 ## 34 BelgiumBEL 0 0 0 0 ## 35 BermudaBER 0 0 0 0 ## 36 BoliviaBOL 0 0 0 0 ## 37 Bosnia and HerzegovinaBIH 0 0 0 0 ## 38 BrazilBRA 0 0 0 0 ## 39 BulgariaBUL 0 0 0 0 ## 40 ChileCHI 0 0 0 0 ## 41 ColombiaCOL 0 0 0 0 ## 42 CroatiaCRO 0 0 0 0 ## 43 CyprusCYP 0 0 0 0 ## 44 DenmarkDEN 0 0 0 0 ## 45 EcuadorECU 0 0 0 0 ## 46 EritreaERI 0 0 0 0 ## 47 EstoniaEST 0 0 0 0 ## 48 GeorgiaGEO 0 0 0 0 ## 49 GhanaGHA 0 0 0 0 ## 50 GreeceGRE 0 0 0 0 ## 51 Hong KongHKG 0 0 0 0 ## 52 HungaryHUN 0 0 0 0 ## 53 IcelandISL 0 0 0 0 ## 54 IndiaIND 0 0 0 0 ## 55 IranIRI 0 0 0 0 ## 56 IrelandIRL 0 0 0 0 ## 57 IsraelISR 0 0 0 0 ## 58 JamaicaJAM 0 0 0 0 ## 59 KenyaKEN 0 0 0 0 ## 60 KosovoKOS 0 0 0 0 ## 61 KyrgyzstanKGZ 0 0 0 0 ## 62 LebanonLBN 0 0 0 0 ## 63 LithuaniaLTU 0 0 0 0 ## 64 LuxembourgLUX 0 0 0 0 ## 65 MacedoniaMKD 0 0 0 0 ## 66 MadagascarMAD 0 0 0 0 ## 67 MalaysiaMAS 0 0 0 0 ## 68 MaltaMLT 0 0 0 0 ## 69 MexicoMEX 0 0 0 0 ## 70 MoldovaMDA 0 0 0 0 ## 71 MonacoMON 0 0 0 0 ## 72 MongoliaMGL 0 0 0 0 ## 73 MontenegroMNE 0 0 0 0 ## 74 MoroccoMAR 0 0 0 0 ## 75 New ZealandNZL 0 0 0 0 ## 76 NigeriaNGR 0 0 0 0 ## 77 North KoreaPRK 0 0 0 0 ## 78 PakistanPAK 0 0 0 0 ## 79 PhilippinesPHI 0 0 0 0 ## 80 PortugalPOR 0 0 0 0 ## 81 Puerto RicoPUR 0 0 0 0 ## 82 RomaniaROU 0 0 0 0 ## 83 San MarinoSMR 0 0 0 0 ## 84 SerbiaSRB 0 0 0 0 ## 85 SingaporeSGP 0 0 0 0 ## 86 South AfricaRSA 0 0 0 0 ## 87 TaiwanTPE 0 0 0 0 ## 88 ThailandTHA 0 0 0 0 ## 89 Timor-LesteTLS 0 0 0 0 ## 90 TogoTOG 0 0 0 0 ## 91 TongaTGA 0 0 0 0 ## 92 TurkeyTUR 0 0 0 0 ## 93 Unified KoreaCOR 0 0 0 0 ## 94 UzbekistanUZB 0 0 0 0
html_table(tables[[2]])
## X1 ## 1 Alpine Skiing ## 2 Men’s Alpine Combined ## 3 Women’s Giant Slalom ## 4 Men’s Downhill ## 5 Women’s Slalom ## 6 Men’s Super-G ## 7 Women’s Super-G ## 8 Men’s Giant Slalom ## 9 Women’s Downhill ## 10 Men’s Slalom ## 11 Women’s Alpine Combined ## 12 Alpine Team Event ## 13 Biathlon ## 14 Women’s 7.5km Sprint ## 15 Men’s 10km Sprint ## 16 Women’s 10km Pursuit ## 17 Men’s 12.5km Pursuit ## 18 Women’s 15km Individual ## 19 Men’s 20km Individual ## 20 Women’s 12.5km Mass Start ## 21 Men’s 15km Mass Start ## 22 Mixed Relay ## 23 Women’s 4x6km Relay ## 24 Men’s 4x7.5km Relay ## 25 Bobsled ## 26 Two-man ## 27 Two-woman ## 28 Four-man ## 29 Cross-Country Skiing ## 30 Women’s 7.5km + 7.5km Skiathlon ## 31 Men’s 15km + 15km Skiathlon ## 32 Women’s Sprint Classic ## 33 Men’s Sprint Classic ## 34 Women’s 10km Free ## 35 Men’s 15km Free ## 36 Women’s 4 x 5km Relay ## 37 Men’s 4 x 10km Relay ## 38 Women’s Team Sprint Free ## 39 Men’s Team Sprint Free ## 40 Men’s 50km Mass Start Classic ## 41 Women’s 30km Mass Start Classic ## 42 Curling ## 43 Mixed Doubles’ Tournament ## 44 Men’s Tournament ## 45 Women’s Tournament ## 46 Figure Skating ## 47 Team Event ## 48 Pairs Event ## 49 Men’s Singles ## 50 Ice Dance ## 51 Women’s Singles ## 52 Freestyle Skiing ## 53 Women’s Moguls ## 54 Men’s Moguls ## 55 Women’s Aerials ## 56 Women’s Ski Slopestyle ## 57 Men’s Ski Slopestyle ## 58 Men’s Aerials ## 59 Women’s Ski Halfpipe ## 60 Men’s Ski Cross ## 61 Men’s Ski Halfpipe ## 62 Women’s Ski Cross ## 63 Ice Hockey ## 64 Women’s Tournament ## 65 Men’s Tournament ## 66 Luge ## 67 Men’s Singles ## 68 Women’s Singles ## 69 Doubles ## 70 Team Relay ## 71 Nordic Combined ## 72 Individual Gundersen Normal Hill ## 73 Individual Gundersen Large Hill ## 74 Team Gundersen Large Hill ## 75 Short-Track Speedskating ## 76 Men’s 1,500m ## 77 Women’s 500m ## 78 Women’s 1,500m ## 79 Men’s 1,000m ## 80 Women’s 3,000m Relay ## 81 Men’s 500m ## 82 Women’s 1,000m ## 83 Men’s 5,000m Relay ## 84 Skeleton ## 85 Men’s Event ## 86 Women’s Event ## 87 Ski Jumping ## 88 Men’s Normal Hill Individual ## 89 Women’s Normal Hill Individual ## 90 Men’s Large Hill Individual ## 91 Men’s Team ## 92 Snowboard ## 93 Men’s Slopestyle ## 94 Women’s Slopestyle ## 95 Women’s Halfpipe ## 96 Men’s Halfpipe ## 97 Men’s Snowboard Cross ## 98 Women’s Snowboard Cross ## 99 Women’s Big Air ## 100 Men’s Big Air ## 101 Women’s Parallel Giant Slalom ## 102 Men’s Parallel Giant Slalom ## 103 Speedskating ## 104 Women’s 3,000m ## 105 Men’s 5,000m ## 106 Women’s 1,500m ## 107 Men’s 1,500m ## 108 Women’s 1,000m ## 109 Men’s 10,000m ## 110 Women’s 5,000m ## 111 Women’s 500m ## 112 Men’s 500m ## 113 Women’s Team Pursuit ## 114 Men’s Team Pursuit ## 115 Men’s 1,000m ## 116 Women’s Mass Start ## 117 Men’s Mass Start ## X2 ## 1 Alpine Skiing ## 2 Marcel HirscherAustriaAUT ## 3 Mikaela ShiffrinUnited StatesUSA ## 4 Aksel Lund SvindalNorwayNOR ## 5 Frida HansdotterSwedenSWE ## 6 Matthias MayerAustriaAUT ## 7 Ester LedeckaCzech RepublicCZE ## 8 Marcel HirscherAustriaAUT ## 9 Sofia GoggiaItalyITA ## 10 Feb. 21, 8:00 p.m. E.T. ## 11 Feb. 21, 9:30 p.m. E.T. ## 12 Feb. 23, 10:34 p.m. E.T. ## 13 Biathlon ## 14 Laura DahlmeierGermanyGER ## 15 Arnd PeifferGermanyGER ## 16 Laura DahlmeierGermanyGER ## 17 Martin FourcadeFranceFRA ## 18 Hanna OebergSwedenSWE ## 19 Johannes Thingnes BoeNorwayNOR ## 20 Anastasiya KuzminaSlovakiaSVK ## 21 Martin FourcadeFranceFRA ## 22 FranceFRA ## 23 Feb. 22, 6:15 a.m. E.T. ## 24 Feb. 23, 6:15 a.m. E.T. ## 25 Bobsled ## 26 CanadaCANGermanyGER ## 27 Feb. 21, 8:00 a.m. E.T. ## 28 Feb. 24, 9:15 p.m. E.T. ## 29 Cross-Country Skiing ## 30 Charlotte KallaSwedenSWE ## 31 Simen Hegstad KruegerNorwayNOR ## 32 Stina NilssonSwedenSWE ## 33 Johannes Hoesflot KlaeboNorwayNOR ## 34 Ragnhild HagaNorwayNOR ## 35 Dario ColognaSwitzerlandSUI ## 36 NorwayNOR ## 37 NorwayNOR ## 38 United StatesUSA ## 39 NorwayNOR ## 40 Feb. 24, 12:00 a.m. E.T. ## 41 Feb. 25, 1:15 a.m. E.T. ## 42 Curling ## 43 CanadaCAN ## 44 Feb. 24, 1:35 a.m. E.T. ## 45 Feb. 24, 7:05 p.m. E.T. ## 46 Figure Skating ## 47 CanadaCAN ## 48 GermanyGER ## 49 Yuzuru HanyuJapanJPN ## 50 CanadaCAN ## 51 Russia’s Alina Zagitova leads going into the long program. Read our coverage here. ## 52 Freestyle Skiing ## 53 Perrine LaffontFranceFRA ## 54 Mikael KingsburyCanadaCAN ## 55 Hanna HuskovaBelarusBLR ## 56 Sarah HoefflinSwitzerlandSUI ## 57 Oystein BraatenNorwayNOR ## 58 Oleksandr AbramenkoUkraineUKR ## 59 Cassie SharpeCanadaCAN ## 60 Brady LemanCanadaCAN ## 61 Feb. 21, 9:30 p.m. E.T. ## 62 Feb. 22, 9:20 p.m. E.T. ## 63 Ice Hockey ## 64 Feb. 21, 11:10 p.m. E.T. ## 65 Feb. 24, 11:10 p.m. E.T. ## 66 Luge ## 67 David GleirscherAustriaAUT ## 68 Natalie GeisenbergerGermanyGER ## 69 GermanyGER ## 70 GermanyGER ## 71 Nordic Combined ## 72 Eric FrenzelGermanyGER ## 73 Johannes RydzekGermanyGER ## 74 Feb. 22, 5:20 a.m. E.T. ## 75 Short-Track Speedskating ## 76 Hyojun LimSouth KoreaKOR ## 77 Arianna FontanaItalyITA ## 78 Choi MinjeongSouth KoreaKOR ## 79 Samuel GirardCanadaCAN ## 80 South KoreaKOR ## 81 Feb. 22, 6:15 a.m. E.T. ## 82 Feb. 22, 6:29 a.m. E.T. ## 83 Feb. 22, 7:00 a.m. E.T. ## 84 Skeleton ## 85 Yun SungbinSouth KoreaKOR ## 86 Lizzy YarnoldBritainGBR ## 87 Ski Jumping ## 88 Andreas WellingerGermanyGER ## 89 Maren LundbyNorwayNOR ## 90 Kamil StochPolandPOL ## 91 NorwayNOR ## 92 Snowboard ## 93 Red GerardUnited StatesUSA ## 94 Jamie AndersonUnited StatesUSA ## 95 Chloe KimUnited StatesUSA ## 96 Shaun WhiteUnited StatesUSA ## 97 Pierre VaultierFranceFRA ## 98 Michela MoioliItalyITA ## 99 Feb. 21, 7:30 p.m. E.T. ## 100 Feb. 23, 8:00 p.m. E.T. ## 101 Feb. 24, 12:30 a.m. E.T. ## 102 Feb. 24, 12:37 a.m. E.T. ## 103 Speedskating ## 104 Carlijn AchtereekteNetherlandsNED ## 105 Sven KramerNetherlandsNED ## 106 Ireen WustNetherlandsNED ## 107 Kjeld NuisNetherlandsNED ## 108 Jorien ter MorsNetherlandsNED ## 109 Ted-Jan BloemenCanadaCAN ## 110 Esmee VisserNetherlandsNED ## 111 Nao KodairaJapanJPN ## 112 Havard LorentzenNorwayNOR ## 113 Feb. 21, 7:58 a.m. E.T. ## 114 Feb. 21, 8:17 a.m. E.T. ## 115 Feb. 23, 5:00 a.m. E.T. ## 116 Feb. 24, 7:30 a.m. E.T. ## 117 Feb. 24, 8:00 a.m. E.T. ## X3 ## 1 Alpine Skiing ## 2 Alexis PinturaultFranceFRA ## 3 Ragnhild MowinckelNorwayNOR ## 4 Kjetil JansrudNorwayNOR ## 5 Wendy HoldenerSwitzerlandSUI ## 6 Beat FeuzSwitzerlandSUI ## 7 Anna VeithAustriaAUT ## 8 Henrik KristoffersenNorwayNOR ## 9 Ragnhild MowinckelNorwayNOR ## 10 Feb. 21, 8:00 p.m. E.T. ## 11 Feb. 21, 9:30 p.m. E.T. ## 12 Feb. 23, 10:34 p.m. E.T. ## 13 Biathlon ## 14 Marte OlsbuNorwayNOR ## 15 Michal KrcmarCzech RepublicCZE ## 16 Anastasiya KuzminaSlovakiaSVK ## 17 Sebastian SamuelssonSwedenSWE ## 18 Anastasiya KuzminaSlovakiaSVK ## 19 Jakov FakSloveniaSLO ## 20 Darya DomrachevaBelarusBLR ## 21 Simon SchemppGermanyGER ## 22 NorwayNOR ## 23 Feb. 22, 6:15 a.m. E.T. ## 24 Feb. 23, 6:15 a.m. E.T. ## 25 Bobsled ## 26 ## 27 Feb. 21, 8:00 a.m. E.T. ## 28 Feb. 24, 9:15 p.m. E.T. ## 29 Cross-Country Skiing ## 30 Marit BjoergenNorwayNOR ## 31 Martin Johnsrud SundbyNorwayNOR ## 32 Maiken Caspersen FallaNorwayNOR ## 33 Federico PellegrinoItalyITA ## 34 Charlotte KallaSwedenSWE ## 35 Simen Hegstad KruegerNorwayNOR ## 36 SwedenSWE ## 37 Olympic Athletes From RussiaOAR ## 38 SwedenSWE ## 39 Olympic Athletes From RussiaOAR ## 40 Feb. 24, 12:00 a.m. E.T. ## 41 Feb. 25, 1:15 a.m. E.T. ## 42 Curling ## 43 SwitzerlandSUI ## 44 Feb. 24, 1:35 a.m. E.T. ## 45 Feb. 24, 7:05 p.m. E.T. ## 46 Figure Skating ## 47 Olympic Athletes From RussiaOAR ## 48 ChinaCHN ## 49 Shoma UnoJapanJPN ## 50 FranceFRA ## 51 Russia’s Alina Zagitova leads going into the long program. Read our coverage here. ## 52 Freestyle Skiing ## 53 Justine Dufour-LapointeCanadaCAN ## 54 Matt GrahamAustraliaAUS ## 55 Zhang XinChinaCHN ## 56 Mathilde GremaudSwitzerlandSUI ## 57 Nicholas GoepperUnited StatesUSA ## 58 Jia ZongyangChinaCHN ## 59 Marie MartinodFranceFRA ## 60 Marc BischofbergerSwitzerlandSUI ## 61 Feb. 21, 9:30 p.m. E.T. ## 62 Feb. 22, 9:20 p.m. E.T. ## 63 Ice Hockey ## 64 Feb. 21, 11:10 p.m. E.T. ## 65 Feb. 24, 11:10 p.m. E.T. ## 66 Luge ## 67 Chris MazdzerUnited StatesUSA ## 68 Dajana EitbergerGermanyGER ## 69 AustriaAUT ## 70 CanadaCAN ## 71 Nordic Combined ## 72 Akito WatabeJapanJPN ## 73 Fabian RiessleGermanyGER ## 74 Feb. 22, 5:20 a.m. E.T. ## 75 Short-Track Speedskating ## 76 Sjinkie KnegtNetherlandsNED ## 77 Yara van KerkhofNetherlandsNED ## 78 Li JinyuChinaCHN ## 79 John-Henry KruegerUnited StatesUSA ## 80 ItalyITA ## 81 Feb. 22, 6:15 a.m. E.T. ## 82 Feb. 22, 6:29 a.m. E.T. ## 83 Feb. 22, 7:00 a.m. E.T. ## 84 Skeleton ## 85 Nikita TregubovOlympic Athletes From RussiaOAR ## 86 Jacqueline LoellingGermanyGER ## 87 Ski Jumping ## 88 Johann Andre ForfangNorwayNOR ## 89 Katharina AlthausGermanyGER ## 90 Andreas WellingerGermanyGER ## 91 GermanyGER ## 92 Snowboard ## 93 Max ParrotCanadaCAN ## 94 Laurie BlouinCanadaCAN ## 95 Liu JiayuChinaCHN ## 96 Ayumu HiranoJapanJPN ## 97 Jarryd HughesAustraliaAUS ## 98 Julia Pereira de Sousa MabileauFranceFRA ## 99 Feb. 21, 7:30 p.m. E.T. ## 100 Feb. 23, 8:00 p.m. E.T. ## 101 Feb. 24, 12:30 a.m. E.T. ## 102 Feb. 24, 12:37 a.m. E.T. ## 103 Speedskating ## 104 Ireen WustNetherlandsNED ## 105 Ted-Jan BloemenCanadaCAN ## 106 Miho TakagiJapanJPN ## 107 Patrick RoestNetherlandsNED ## 108 Nao KodairaJapanJPN ## 109 Jorrit BergsmaNetherlandsNED ## 110 Martina SablikovaCzech RepublicCZE ## 111 Lee Sang-HwaSouth KoreaKOR ## 112 Cha Min KyuSouth KoreaKOR ## 113 Feb. 21, 7:58 a.m. E.T. ## 114 Feb. 21, 8:17 a.m. E.T. ## 115 Feb. 23, 5:00 a.m. E.T. ## 116 Feb. 24, 7:30 a.m. E.T. ## 117 Feb. 24, 8:00 a.m. E.T. ## X4 ## 1 Alpine Skiing ## 2 Victor Muffat-JeandetFranceFRA ## 3 Federica BrignoneItalyITA ## 4 Beat FeuzSwitzerlandSUI ## 5 Katharina GallhuberAustriaAUT ## 6 Kjetil JansrudNorwayNOR ## 7 Tina WeiratherLiechtensteinLIE ## 8 Alexis PinturaultFranceFRA ## 9 Lindsey VonnUnited StatesUSA ## 10 Feb. 21, 8:00 p.m. E.T. ## 11 Feb. 21, 9:30 p.m. E.T. ## 12 Feb. 23, 10:34 p.m. E.T. ## 13 Biathlon ## 14 Veronika VitkovaCzech RepublicCZE ## 15 Dominik WindischItalyITA ## 16 Anais BescondFranceFRA ## 17 Benedikt DollGermanyGER ## 18 Laura DahlmeierGermanyGER ## 19 Dominik LandertingerAustriaAUT ## 20 Tiril EckhoffNorwayNOR ## 21 Emil Hegle SvendsenNorwayNOR ## 22 ItalyITA ## 23 Feb. 22, 6:15 a.m. E.T. ## 24 Feb. 23, 6:15 a.m. E.T. ## 25 Bobsled ## 26 LatviaLAT ## 27 Feb. 21, 8:00 a.m. E.T. ## 28 Feb. 24, 9:15 p.m. E.T. ## 29 Cross-Country Skiing ## 30 Krista ParmakoskiFinlandFIN ## 31 Hans Christer HolundNorwayNOR ## 32 Yulia BelorukovaOlympic Athletes From RussiaOAR ## 33 Alexander BolshunovOlympic Athletes From RussiaOAR ## 34 Marit BjoergenNorwayNORKrista ParmakoskiFinlandFIN ## 35 Denis SpitsovOlympic Athletes From RussiaOAR ## 36 Olympic Athletes From RussiaOAR ## 37 FranceFRA ## 38 NorwayNOR ## 39 FranceFRA ## 40 Feb. 24, 12:00 a.m. E.T. ## 41 Feb. 25, 1:15 a.m. E.T. ## 42 Curling ## 43 Olympic Athletes From RussiaOAR ## 44 Feb. 24, 1:35 a.m. E.T. ## 45 Feb. 24, 7:05 p.m. E.T. ## 46 Figure Skating ## 47 United StatesUSA ## 48 CanadaCAN ## 49 Javier FernandezSpainESP ## 50 United StatesUSA ## 51 Russia’s Alina Zagitova leads going into the long program. Read our coverage here. ## 52 Freestyle Skiing ## 53 Yulia GalyshevaKazakhstanKAZ ## 54 Daichi HaraJapanJPN ## 55 Kong FanyuChinaCHN ## 56 Isabel AtkinBritainGBR ## 57 Alex Beaulieu-MarchandCanadaCAN ## 58 Ilia BurovOlympic Athletes From RussiaOAR ## 59 Brita SigourneyUnited StatesUSA ## 60 Sergey RidzikOlympic Athletes From RussiaOAR ## 61 Feb. 21, 9:30 p.m. E.T. ## 62 Feb. 22, 9:20 p.m. E.T. ## 63 Ice Hockey ## 64 Feb. 21, 11:10 p.m. E.T. ## 65 Feb. 24, 11:10 p.m. E.T. ## 66 Luge ## 67 Johannes LudwigGermanyGER ## 68 Alex GoughCanadaCAN ## 69 GermanyGER ## 70 AustriaAUT ## 71 Nordic Combined ## 72 Lukas KlapferAustriaAUT ## 73 Eric FrenzelGermanyGER ## 74 Feb. 22, 5:20 a.m. E.T. ## 75 Short-Track Speedskating ## 76 Semen ElistratovOlympic Athletes From RussiaOAR ## 77 Kim BoutinCanadaCAN ## 78 Kim BoutinCanadaCAN ## 79 Seo YiraSouth KoreaKOR ## 80 NetherlandsNED ## 81 Feb. 22, 6:15 a.m. E.T. ## 82 Feb. 22, 6:29 a.m. E.T. ## 83 Feb. 22, 7:00 a.m. E.T. ## 84 Skeleton ## 85 Dom ParsonsBritainGBR ## 86 Laura DeasBritainGBR ## 87 Ski Jumping ## 88 Robert JohanssonNorwayNOR ## 89 Sara TakanashiJapanJPN ## 90 Robert JohanssonNorwayNOR ## 91 PolandPOL ## 92 Snowboard ## 93 Mark McMorrisCanadaCAN ## 94 Enni RukajarviFinlandFIN ## 95 Arielle GoldUnited StatesUSA ## 96 Scotty JamesAustraliaAUS ## 97 Regino HernandezSpainESP ## 98 Eva SamkovaCzech RepublicCZE ## 99 Feb. 21, 7:30 p.m. E.T. ## 100 Feb. 23, 8:00 p.m. E.T. ## 101 Feb. 24, 12:30 a.m. E.T. ## 102 Feb. 24, 12:37 a.m. E.T. ## 103 Speedskating ## 104 Antoinette de JongNetherlandsNED ## 105 Sverre Lunde PedersenNorwayNOR ## 106 Marrit LeenstraNetherlandsNED ## 107 Kim Min SeokSouth KoreaKOR ## 108 Miho TakagiJapanJPN ## 109 Nicola TumoleroItalyITA ## 110 Natalia VoroninaOlympic Athletes From RussiaOAR ## 111 Karolina ErbanovaCzech RepublicCZE ## 112 Gao TingyuChinaCHN ## 113 Feb. 21, 7:58 a.m. E.T. ## 114 Feb. 21, 8:17 a.m. E.T. ## 115 Feb. 23, 5:00 a.m. E.T. ## 116 Feb. 24, 7:30 a.m. E.T. ## 117 Feb. 24, 8:00 a.m. E.T.
Bring in medal count as data frame.
tab1 <-html_table(tables[[1]], header = TRUE) head(tab1, n = 3)
## Medal Count Gold Silver Bronze Total ## 1 NorwayNOR 12 11 9 32 ## 2 GermanyGER 11 7 5 23 ## 3 CanadaCAN 9 5 6 20
Todo: isolate the country name.
tab1_sep <- tab1 %>%
separate("Medal Count",
into = c("country","country_code"),
sep = -3)
head(tab1_sep)
## country country_code Gold Silver Bronze Total ## 1 Norway NOR 12 11 9 32 ## 2 Germany GER 11 7 5 23 ## 3 Canada CAN 9 5 6 20 ## 4 Netherlands NED 6 5 3 14 ## 5 United States USA 6 3 5 14 ## 6 France FRA 5 4 5 14
Reshape data into tall format in preparation for ggplot.
olympics <- tab1_sep %>% gather(Gold:Bronze, key = "medal", value = "count") %>% uncount(count) olympics
## country country_code Total medal ## 1 Norway NOR 32 Gold ## 1.1 Norway NOR 32 Gold ## 1.2 Norway NOR 32 Gold ## 1.3 Norway NOR 32 Gold ## 1.4 Norway NOR 32 Gold ## 1.5 Norway NOR 32 Gold ## 1.6 Norway NOR 32 Gold ## 1.7 Norway NOR 32 Gold ## 1.8 Norway NOR 32 Gold ## 1.9 Norway NOR 32 Gold ## 1.10 Norway NOR 32 Gold ## 1.11 Norway NOR 32 Gold ## 2 Germany GER 23 Gold ## 2.1 Germany GER 23 Gold ## 2.2 Germany GER 23 Gold ## 2.3 Germany GER 23 Gold ## 2.4 Germany GER 23 Gold ## 2.5 Germany GER 23 Gold ## 2.6 Germany GER 23 Gold ## 2.7 Germany GER 23 Gold ## 2.8 Germany GER 23 Gold ## 2.9 Germany GER 23 Gold ## 2.10 Germany GER 23 Gold ## 3 Canada CAN 20 Gold ## 3.1 Canada CAN 20 Gold ## 3.2 Canada CAN 20 Gold ## 3.3 Canada CAN 20 Gold ## 3.4 Canada CAN 20 Gold ## 3.5 Canada CAN 20 Gold ## 3.6 Canada CAN 20 Gold ## 3.7 Canada CAN 20 Gold ## 3.8 Canada CAN 20 Gold ## 4 Netherlands NED 14 Gold ## 4.1 Netherlands NED 14 Gold ## 4.2 Netherlands NED 14 Gold ## 4.3 Netherlands NED 14 Gold ## 4.4 Netherlands NED 14 Gold ## 4.5 Netherlands NED 14 Gold ## 5 United States USA 14 Gold ## 5.1 United States USA 14 Gold ## 5.2 United States USA 14 Gold ## 5.3 United States USA 14 Gold ## 5.4 United States USA 14 Gold ## 5.5 United States USA 14 Gold ## 6 France FRA 14 Gold ## 6.1 France FRA 14 Gold ## 6.2 France FRA 14 Gold ## 6.3 France FRA 14 Gold ## 6.4 France FRA 14 Gold ## 8 Austria AUT 10 Gold ## 8.1 Austria AUT 10 Gold ## 8.2 Austria AUT 10 Gold ## 8.3 Austria AUT 10 Gold ## 9 Japan JPN 10 Gold ## 9.1 Japan JPN 10 Gold ## 10 Italy ITA 9 Gold ## 10.1 Italy ITA 9 Gold ## 10.2 Italy ITA 9 Gold ## 11 Sweden SWE 8 Gold ## 11.1 Sweden SWE 8 Gold ## 11.2 Sweden SWE 8 Gold ## 11.3 Sweden SWE 8 Gold ## 12 South Korea KOR 8 Gold ## 12.1 South Korea KOR 8 Gold ## 12.2 South Korea KOR 8 Gold ## 12.3 South Korea KOR 8 Gold ## 13 Switzerland SUI 8 Gold ## 13.1 Switzerland SUI 8 Gold ## 15 Czech Republic CZE 6 Gold ## 16 Britain GBR 4 Gold ## 18 Slovakia SVK 3 Gold ## 20 Belarus BLR 2 Gold ## 21 Poland POL 2 Gold ## 23 Ukraine UKR 1 Gold ## 94 Norway NOR 32 Silver ## 94.1 Norway NOR 32 Silver ## 94.2 Norway NOR 32 Silver ## 94.3 Norway NOR 32 Silver ## 94.4 Norway NOR 32 Silver ## 94.5 Norway NOR 32 Silver ## 94.6 Norway NOR 32 Silver ## 94.7 Norway NOR 32 Silver ## 94.8 Norway NOR 32 Silver ## 94.9 Norway NOR 32 Silver ## 94.10 Norway NOR 32 Silver ## 95 Germany GER 23 Silver ## 95.1 Germany GER 23 Silver ## 95.2 Germany GER 23 Silver ## 95.3 Germany GER 23 Silver ## 95.4 Germany GER 23 Silver ## 95.5 Germany GER 23 Silver ## 95.6 Germany GER 23 Silver ## 96 Canada CAN 20 Silver ## 96.1 Canada CAN 20 Silver ## 96.2 Canada CAN 20 Silver ## 96.3 Canada CAN 20 Silver ## 96.4 Canada CAN 20 Silver ## 97 Netherlands NED 14 Silver ## 97.1 Netherlands NED 14 Silver ## 97.2 Netherlands NED 14 Silver ## 97.3 Netherlands NED 14 Silver ## 97.4 Netherlands NED 14 Silver ## 98 United States USA 14 Silver ## 98.1 United States USA 14 Silver ## 98.2 United States USA 14 Silver ## 99 France FRA 14 Silver ## 99.1 France FRA 14 Silver ## 99.2 France FRA 14 Silver ## 99.3 France FRA 14 Silver ## 100 Olympic Athletes From Russia OAR 13 Silver ## 100.1 Olympic Athletes From Russia OAR 13 Silver ## 100.2 Olympic Athletes From Russia OAR 13 Silver ## 100.3 Olympic Athletes From Russia OAR 13 Silver ## 101 Austria AUT 10 Silver ## 101.1 Austria AUT 10 Silver ## 102 Japan JPN 10 Silver ## 102.1 Japan JPN 10 Silver ## 102.2 Japan JPN 10 Silver ## 102.3 Japan JPN 10 Silver ## 102.4 Japan JPN 10 Silver ## 103 Italy ITA 9 Silver ## 103.1 Italy ITA 9 Silver ## 104 Sweden SWE 8 Silver ## 104.1 Sweden SWE 8 Silver ## 104.2 Sweden SWE 8 Silver ## 104.3 Sweden SWE 8 Silver ## 105 South Korea KOR 8 Silver ## 105.1 South Korea KOR 8 Silver ## 106 Switzerland SUI 8 Silver ## 106.1 Switzerland SUI 8 Silver ## 106.2 Switzerland SUI 8 Silver ## 106.3 Switzerland SUI 8 Silver ## 106.4 Switzerland SUI 8 Silver ## 107 China CHN 7 Silver ## 107.1 China CHN 7 Silver ## 107.2 China CHN 7 Silver ## 107.3 China CHN 7 Silver ## 107.4 China CHN 7 Silver ## 108 Czech Republic CZE 6 Silver ## 108.1 Czech Republic CZE 6 Silver ## 111 Slovakia SVK 3 Silver ## 111.1 Slovakia SVK 3 Silver ## 112 Australia AUS 3 Silver ## 112.1 Australia AUS 3 Silver ## 113 Belarus BLR 2 Silver ## 117 Slovenia SLO 1 Silver ## 187 Norway NOR 32 Bronze ## 187.1 Norway NOR 32 Bronze ## 187.2 Norway NOR 32 Bronze ## 187.3 Norway NOR 32 Bronze ## 187.4 Norway NOR 32 Bronze ## 187.5 Norway NOR 32 Bronze ## 187.6 Norway NOR 32 Bronze ## 187.7 Norway NOR 32 Bronze ## 187.8 Norway NOR 32 Bronze ## 188 Germany GER 23 Bronze ## 188.1 Germany GER 23 Bronze ## 188.2 Germany GER 23 Bronze ## 188.3 Germany GER 23 Bronze ## 188.4 Germany GER 23 Bronze ## 189 Canada CAN 20 Bronze ## 189.1 Canada CAN 20 Bronze ## 189.2 Canada CAN 20 Bronze ## 189.3 Canada CAN 20 Bronze ## 189.4 Canada CAN 20 Bronze ## 189.5 Canada CAN 20 Bronze ## 190 Netherlands NED 14 Bronze ## 190.1 Netherlands NED 14 Bronze ## 190.2 Netherlands NED 14 Bronze ## 191 United States USA 14 Bronze ## 191.1 United States USA 14 Bronze ## 191.2 United States USA 14 Bronze ## 191.3 United States USA 14 Bronze ## 191.4 United States USA 14 Bronze ## 192 France FRA 14 Bronze ## 192.1 France FRA 14 Bronze ## 192.2 France FRA 14 Bronze ## 192.3 France FRA 14 Bronze ## 192.4 France FRA 14 Bronze ## 193 Olympic Athletes From Russia OAR 13 Bronze ## 193.1 Olympic Athletes From Russia OAR 13 Bronze ## 193.2 Olympic Athletes From Russia OAR 13 Bronze ## 193.3 Olympic Athletes From Russia OAR 13 Bronze ## 193.4 Olympic Athletes From Russia OAR 13 Bronze ## 193.5 Olympic Athletes From Russia OAR 13 Bronze ## 193.6 Olympic Athletes From Russia OAR 13 Bronze ## 193.7 Olympic Athletes From Russia OAR 13 Bronze ## 193.8 Olympic Athletes From Russia OAR 13 Bronze ## 194 Austria AUT 10 Bronze ## 194.1 Austria AUT 10 Bronze ## 194.2 Austria AUT 10 Bronze ## 194.3 Austria AUT 10 Bronze ## 195 Japan JPN 10 Bronze ## 195.1 Japan JPN 10 Bronze ## 195.2 Japan JPN 10 Bronze ## 196 Italy ITA 9 Bronze ## 196.1 Italy ITA 9 Bronze ## 196.2 Italy ITA 9 Bronze ## 196.3 Italy ITA 9 Bronze ## 198 South Korea KOR 8 Bronze ## 198.1 South Korea KOR 8 Bronze ## 199 Switzerland SUI 8 Bronze ## 200 China CHN 7 Bronze ## 200.1 China CHN 7 Bronze ## 201 Czech Republic CZE 6 Bronze ## 201.1 Czech Republic CZE 6 Bronze ## 201.2 Czech Republic CZE 6 Bronze ## 202 Britain GBR 4 Bronze ## 202.1 Britain GBR 4 Bronze ## 202.2 Britain GBR 4 Bronze ## 203 Finland FIN 4 Bronze ## 203.1 Finland FIN 4 Bronze ## 203.2 Finland FIN 4 Bronze ## 203.3 Finland FIN 4 Bronze ## 205 Australia AUS 3 Bronze ## 207 Poland POL 2 Bronze ## 208 Spain ESP 2 Bronze ## 208.1 Spain ESP 2 Bronze ## 211 Kazakhstan KAZ 1 Bronze ## 212 Latvia LAT 1 Bronze ## 213 Liechtenstein LIE 1 Bronze
Let's plot.
olympics %>% ggplot(aes(x = country)) + geom_bar()
Long names are easier on the y-axis.
olympics %>% ggplot(aes(x = country)) + geom_bar() + coord_flip()
There are two approaches to order the bars:
ggplot() directly.Let's do 2, which requires talking about factors().
Factors are a way to represent categorical data as integers that are looked up in a linked table of levels.
a <- factor(olympics$country) class(a)
## [1] "factor"
levels(a)
## [1] "Australia" "Austria" ## [3] "Belarus" "Britain" ## [5] "Canada" "China" ## [7] "Czech Republic" "Finland" ## [9] "France" "Germany" ## [11] "Italy" "Japan" ## [13] "Kazakhstan" "Latvia" ## [15] "Liechtenstein" "Netherlands" ## [17] "Norway" "Olympic Athletes From Russia" ## [19] "Poland" "Slovakia" ## [21] "Slovenia" "South Korea" ## [23] "Spain" "Sweden" ## [25] "Switzerland" "Ukraine" ## [27] "United States"
tail(table(a))
## a ## South Korea Spain Sweden Switzerland Ukraine ## 8 2 8 8 1 ## United States ## 14
b <- factor(olympics$country,
levels = c(levels(a), "Oregon"))
tail(table(b))
## b ## Spain Sweden Switzerland Ukraine United States ## 2 8 8 1 14 ## Oregon ## 0
Let's use the factor class, but then set the order of the levels based on the Total number of medals.
olympics <- olympics %>% mutate(country = factor(country)) %>% mutate(country = reorder(country, Total)) levels(olympics$country)
## [1] "Kazakhstan" "Latvia" ## [3] "Liechtenstein" "Slovenia" ## [5] "Ukraine" "Belarus" ## [7] "Poland" "Spain" ## [9] "Australia" "Slovakia" ## [11] "Britain" "Finland" ## [13] "Czech Republic" "China" ## [15] "South Korea" "Sweden" ## [17] "Switzerland" "Italy" ## [19] "Austria" "Japan" ## [21] "Olympic Athletes From Russia" "France" ## [23] "Netherlands" "United States" ## [25] "Canada" "Germany" ## [27] "Norway"
olympics %>% ggplot(aes(x = country)) + geom_bar() + coord_flip()
How can we indicate the type of medal?
Map the fill to the medal column.
olympics %>% ggplot(aes(x = country, fill = medal)) + geom_bar() + coord_flip()
Now what's wrong?
Gotta fix the ordering of the medals.
olympics <- olympics %>%
mutate(medal = factor(medal,
levels = c("Gold", "Silver", "Bronze")))
levels(olympics$medal)
## [1] "Gold" "Silver" "Bronze"
olympics %>% ggplot(aes(x = country, fill = medal)) + geom_bar() + coord_flip()
But those colors….
Colors are represented by name ("papayawhip"), RBG values, or hex code.
cols <- c("Gold" = "#CFB53B",
"Silver" = "#E6E8FA",
"Bronze" = "#8C7853")
olympics %>%
ggplot(aes(x = country, fill = medal)) +
geom_bar() +
coord_flip() +
scale_fill_manual(values = cols) +
theme_bw() +
xlab("")
url <- "https://www.nytimes.com/interactive/2018/sports/olympics/medal-count-results-schedule.html?smid=tw-nytimes&smtyp=cur"
now <- now()
tables <- url %>%
read_html() %>%
html_nodes("table")
olympics <- html_table(tables[[1]], header = T) %>%
separate("Medal Count",
into = c("country","country_code"),
sep = -3) %>%
gather(Gold:Bronze, key = "medal", value = "count") %>%
uncount(count) %>%
mutate(country = factor(country)) %>%
mutate(country = reorder(country, Total)) %>%
mutate(medal = factor(medal, levels = c("Gold", "Silver", "Bronze")))
cols <- c("Gold" = "#CFB53B",
"Silver" = "#E6E8FA",
"Bronze" = "#8C7853")
olympics %>%
ggplot(aes(x = country, fill = medal)) +
geom_bar() +
coord_flip() +
scale_fill_manual(values = cols) +
theme_bw() +
xlab("") +
labs(title = "2018 Olympic Medals",
subtitle = paste("(as of",
month(now, label = T, abbr = F),
day(now),
")"))
rvest.Advice Be wary when web scraping! It has several notable downsides:
Best alternative: application programmer interface (API).